Filestructure and Shell commands

cd labs
mkdir lab05
cd lab05
mkdir data report images
ls
vim README.md
cat README.md
cd data
curl -O https://raw.githubusercontent.com/ucb-stat133/stat133-fall-2018/master/data/nba2018-players.csv
ls
wc nba2018-players.csv
head nba2018-players.csv
tail nba2018-players.csv

Installing packages

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(readr)

NBA Players Data

dat <- read.csv('nba2018-players.csv', stringsAsFactors = FALSE)
dat <- read_csv('nba2018-players.csv')
## Parsed with column specification:
## cols(
##   player = col_character(),
##   team = col_character(),
##   position = col_character(),
##   height = col_integer(),
##   weight = col_integer(),
##   age = col_integer(),
##   experience = col_integer(),
##   college = col_character(),
##   salary = col_double(),
##   games = col_integer(),
##   minutes = col_integer(),
##   points = col_integer(),
##   points3 = col_integer(),
##   points2 = col_integer(),
##   points1 = col_integer()
## )

Filtering, slicing, and selecting

slice() allows you to select rows by position:

three_rows <- slice(dat, 1:3)
three_rows
## # A tibble: 3 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Al Ho… BOS   C            82    245    30          9 Univer… 2.65e7    68
## 2 Amir … BOS   PF           81    240    29         11 <NA>    1.20e7    80
## 3 Avery… BOS   SG           74    180    26          6 Univer… 8.27e6    55
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>

filter() allows you to select rows by condition:

gt_85 <- filter(dat, height > 85)
gt_85
## # A tibble: 5 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Edy T… CLE   C            87    260    24          1 <NA>    5.14e3     1
## 2 Boban… DET   C            87    290    28          1 <NA>    7.00e6    35
## 3 Krist… NYK   PF           87    240    21          1 <NA>    4.32e6    66
## 4 Roy H… DEN   C            86    270    30          8 George… 5.00e6     6
## 5 Alexi… NOP   C            86    248    28          6 <NA>    4.60e6    39
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>

select() allows you to select columns by name:

player_height <- select(dat, player, height)
player_height
## # A tibble: 477 x 2
##    player            height
##    <chr>              <int>
##  1 Al Horford            82
##  2 Amir Johnson          81
##  3 Avery Bradley         74
##  4 Demetrius Jackson     73
##  5 Gerald Green          79
##  6 Isaiah Thomas         69
##  7 Jae Crowder           78
##  8 James Young           78
##  9 Jaylen Brown          79
## 10 Jonas Jerebko         82
## # ... with 467 more rows

Use slice() to subset the data by selecting the first 5 rows

five_rows <- slice(dat, 1:5)
five_rows
## # A tibble: 5 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Al Ho… BOS   C            82    245    30          9 Univer… 2.65e7    68
## 2 Amir … BOS   PF           81    240    29         11 <NA>    1.20e7    80
## 3 Avery… BOS   SG           74    180    26          6 Univer… 8.27e6    55
## 4 Demet… BOS   PG           73    201    22          0 Univer… 1.45e6     5
## 5 Geral… BOS   SF           79    205    31          9 <NA>    1.41e6    47
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>

Use slice() to subset the data by selecting rows 10, 15, 20, …, 50

by_five <- slice(dat, 5*(2:10))
by_five
## # A tibble: 9 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Jonas… BOS   PF           82    231    29          6 <NA>    5.00e6    78
## 2 Tyler… BOS   C            84    253    27          4 Univer… 8.00e6    51
## 3 Derri… CLE   PF           80    240    25          5 Univer… 4.02e5    25
## 4 Jorda… CLE   SG           78    185    25          1 Univer… 8.75e5    37
## 5 Larry… CLE   C            83    235    28          5 Virgin… 2.08e5     5
## 6 Cory … TOR   PG           75    193    25          5 Univer… 7.33e6    80
## 7 Jakob… TOR   C            84    248    21          0 Univer… 2.70e6    54
## 8 P.J. … TOR   SF           78    245    31          5 Univer… 5.30e6    24
## 9 Bradl… WAS   SG           77    207    23          4 Univer… 2.21e7    77
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>

Use slice() to subset the data by selecting the last 5 rows

last_five <- slice(dat, 472:477)
last_five
## # A tibble: 6 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Leand… PHO   SG           75    194    34         13 <NA>    4.00e6    67
## 2 Marqu… PHO   PF           82    233    19          0 Univer… 2.94e6    82
## 3 Ronni… PHO   PG           74    190    33         11 Nichol… 4.65e5    14
## 4 T.J. … PHO   SF           80    230    23          2 North … 2.13e6    66
## 5 Tyler… PHO   PG           70    150    21          0 Univer… 9.18e5    61
## 6 Tyson… PHO   C            85    240    34         15 <NA>    1.24e7    47
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>

Use filter() to subset those players with height less than 70 inches tall

small_70 <- filter(dat, height < 70)
small_70
## # A tibble: 2 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Isaia… BOS   PG           69    185    27          5 Univer… 6.59e6    76
## 2 Kay F… CLE   PG           69    176    21          0 Oaklan… 5.43e5    42
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>

Use filter() to subset rows of GSW

nba_gsw <- filter(dat, team == "GSW")
nba_gsw
## # A tibble: 16 x 15
##    player team  position height weight   age experience college salary
##    <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl>
##  1 Ander… GSW   C            82    273    34         12 <NA>    1.55e6
##  2 Andre… GSW   SF           78    215    33         12 Univer… 1.11e7
##  3 Damia… GSW   C            84    245    21          0 Vander… 1.17e6
##  4 David… GSW   C            81    250    36         13 Xavier… 1.55e6
##  5 Draym… GSW   PF           79    230    26          4 Michig… 1.53e7
##  6 Ian C… GSW   SG           75    175    25          3 Belmon… 1.02e6
##  7 James… GSW   PF           81    230    24          2 Univer… 9.80e5
##  8 JaVal… GSW   C            84    270    29          8 Univer… 1.40e6
##  9 Kevin… GSW   PF           81    240    28          9 Univer… 2.65e7
## 10 Kevon… GSW   C            81    220    20          1 Univer… 1.18e6
## 11 Klay … GSW   SG           79    215    26          5 Washin… 1.67e7
## 12 Matt … GSW   SF           79    226    36         13 Univer… 3.83e5
## 13 Patri… GSW   SG           79    185    21          0 Univer… 5.43e5
## 14 Shaun… GSW   PG           79    192    31         11 <NA>    5.78e6
## 15 Steph… GSW   PG           75    190    28          7 Davids… 1.21e7
## 16 Zaza … GSW   C            83    270    32         13 <NA>    2.90e6
## # ... with 6 more variables: games <int>, minutes <int>, points <int>,
## #   points3 <int>, points2 <int>, points1 <int>

Use filter() to subset rows of GSW centers

nba_gsw_C <- filter(nba_gsw, position == "C")
nba_gsw_C
## # A tibble: 6 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Ander… GSW   C            82    273    34         12 <NA>    1.55e6    14
## 2 Damia… GSW   C            84    245    21          0 Vander… 1.17e6    10
## 3 David… GSW   C            81    250    36         13 Xavier… 1.55e6    68
## 4 JaVal… GSW   C            84    270    29          8 Univer… 1.40e6    77
## 5 Kevon… GSW   C            81    220    20          1 Univer… 1.18e6    53
## 6 Zaza … GSW   C            83    270    32         13 <NA>    2.90e6    70
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>

Use filter() and then select() to subset rows of Lakers and then display their names

nba_lal <- filter(dat, team == "LAL")
nba_lal_players <- select(nba_lal, player)
nba_lal_players
## # A tibble: 14 x 1
##    player           
##    <chr>            
##  1 Brandon Ingram   
##  2 Corey Brewer     
##  3 D'Angelo Russell 
##  4 David Nwaba      
##  5 Ivica Zubac      
##  6 Jordan Clarkson  
##  7 Julius Randle    
##  8 Luol Deng        
##  9 Metta World Peace
## 10 Nick Young       
## 11 Tarik Black      
## 12 Thomas Robinson  
## 13 Timofey Mozgov   
## 14 Tyler Ennis

Use filter() and then select() to display the name and salary of GSW point guards

nba_gsw_PG <- filter(nba_gsw, position == "PG")
player_salary <- select(nba_gsw_PG, player, salary)
player_salary
## # A tibble: 2 x 2
##   player             salary
##   <chr>               <dbl>
## 1 Shaun Livingston  5782450
## 2 Stephen Curry    12112359

Find how to select the name, age, and team of players with more than 10 years of experience, making 10 million dollars or less

nba_10yr10mill <- filter(dat, experience > 10 & salary <= 10000000)
nba_10yr10millshow <- select(nba_10yr10mill, player, age, team)
nba_10yr10millshow
## # A tibble: 36 x 3
##    player              age team 
##    <chr>             <int> <chr>
##  1 Andrew Bogut         32 CLE  
##  2 Dahntay Jones        36 CLE  
##  3 Deron Williams       32 CLE  
##  4 James Jones          36 CLE  
##  5 Kyle Korver          35 CLE  
##  6 Richard Jefferson    36 CLE  
##  7 Jose Calderon        35 ATL  
##  8 Kris Humphries       31 ATL  
##  9 Mike Dunleavy        36 ATL  
## 10 Jason Terry          39 MIL  
## # ... with 26 more rows

Find how to select the name, team, height, and weight of rookie players, 20 years old, displaying only the first five occurrences

rookie_20 <- filter(dat, experience <= 10 & age == 20)
rookienthw <- select(rookie_20, player, team, height, weight)
head(rookienthw, 5)
## # A tibble: 5 x 4
##   player          team  height weight
##   <chr>           <chr>  <int>  <int>
## 1 Jaylen Brown    BOS       79    225
## 2 Rashad Vaughn   MIL       78    202
## 3 Myles Turner    IND       83    243
## 4 Justise Winslow MIA       79    225
## 5 Henry Ellenson  DET       83    245

Adding new variables: mutate()

gsw <- filter(dat, team == "GSW")
gsw <- select(gsw, player, height, weight)
gsw <- slice(gsw, c(4, 8, 10, 14, 15))
gsw
## # A tibble: 5 x 3
##   player           height weight
##   <chr>             <int>  <int>
## 1 David West           81    250
## 2 JaVale McGee         84    270
## 3 Kevon Looney         81    220
## 4 Shaun Livingston     79    192
## 5 Stephen Curry        75    190
mutate(gsw, ht_wt = height/weight)
## # A tibble: 5 x 4
##   player           height weight ht_wt
##   <chr>             <int>  <int> <dbl>
## 1 David West           81    250 0.324
## 2 JaVale McGee         84    270 0.311
## 3 Kevon Looney         81    220 0.368
## 4 Shaun Livingston     79    192 0.411
## 5 Stephen Curry        75    190 0.395
gsw2 <- mutate(gsw, ht_m = height * 0.0254, wt_kg = weight * 0.4536)
gsw2
## # A tibble: 5 x 5
##   player           height weight  ht_m wt_kg
##   <chr>             <int>  <int> <dbl> <dbl>
## 1 David West           81    250  2.06 113. 
## 2 JaVale McGee         84    270  2.13 122. 
## 3 Kevon Looney         81    220  2.06  99.8
## 4 Shaun Livingston     79    192  2.01  87.1
## 5 Stephen Curry        75    190  1.90  86.2

Reordering rows: arrange()

arrange(gsw, height)
## # A tibble: 5 x 3
##   player           height weight
##   <chr>             <int>  <int>
## 1 Stephen Curry        75    190
## 2 Shaun Livingston     79    192
## 3 David West           81    250
## 4 Kevon Looney         81    220
## 5 JaVale McGee         84    270
arrange(gsw, desc(height))
## # A tibble: 5 x 3
##   player           height weight
##   <chr>             <int>  <int>
## 1 JaVale McGee         84    270
## 2 David West           81    250
## 3 Kevon Looney         81    220
## 4 Shaun Livingston     79    192
## 5 Stephen Curry        75    190
arrange(gsw, height, weight)
## # A tibble: 5 x 3
##   player           height weight
##   <chr>             <int>  <int>
## 1 Stephen Curry        75    190
## 2 Shaun Livingston     79    192
## 3 Kevon Looney         81    220
## 4 David West           81    250
## 5 JaVale McGee         84    270

Using the data frame gsw add a new variable product with the product of height and weight

gsw <- mutate(gsw, product = height*weight)
gsw
## # A tibble: 5 x 4
##   player           height weight product
##   <chr>             <int>  <int>   <int>
## 1 David West           81    250   20250
## 2 JaVale McGee         84    270   22680
## 3 Kevon Looney         81    220   17820
## 4 Shaun Livingston     79    192   15168
## 5 Stephen Curry        75    190   14250

Create a new dataa frame gsw3, by adding columns log_height and log_weight with the log transformations of height and weight

gsw3 <- mutate(gsw, log_height = log(height), log_weight = log(weight))
gsw3
## # A tibble: 5 x 6
##   player           height weight product log_height log_weight
##   <chr>             <int>  <int>   <int>      <dbl>      <dbl>
## 1 David West           81    250   20250       4.39       5.52
## 2 JaVale McGee         84    270   22680       4.43       5.60
## 3 Kevon Looney         81    220   17820       4.39       5.39
## 4 Shaun Livingston     79    192   15168       4.37       5.26
## 5 Stephen Curry        75    190   14250       4.32       5.25

Use the original data frame to filter() and arrange() those players with height less than 71 inchess tall, in increasing order

small_71 <- filter(dat, height < 71)
arrange(small_71, height)
## # A tibble: 4 x 15
##   player team  position height weight   age experience college salary games
##   <chr>  <chr> <chr>     <int>  <int> <int>      <int> <chr>    <dbl> <int>
## 1 Isaia… BOS   PG           69    185    27          5 Univer… 6.59e6    76
## 2 Kay F… CLE   PG           69    176    21          0 Oaklan… 5.43e5    42
## 3 Pierr… DAL   PG           70    180    25          0 Baylor… 1.05e5     8
## 4 Tyler… PHO   PG           70    150    21          0 Univer… 9.18e5    61
## # ... with 5 more variables: minutes <int>, points <int>, points3 <int>,
## #   points2 <int>, points1 <int>

Display the name, team, and salary of the top-5 highest paid players

top_paid <- arrange(dat, desc(salary))
top_paid <- select(top_paid, player, team, salary)
head(top_paid, 5)
## # A tibble: 5 x 3
##   player        team    salary
##   <chr>         <chr>    <dbl>
## 1 LeBron James  CLE   30963450
## 2 Al Horford    BOS   26540100
## 3 DeMar DeRozan TOR   26540100
## 4 Kevin Durant  GSW   26540100
## 5 James Harden  HOU   26540100

Dislpay the name, team, and points3, of the top 10 three-point players

top_points3 <- arrange(dat, desc(points3))
top_points3 <- select(top_points3, player, team, points3)
head(top_points3, 10)
## # A tibble: 10 x 3
##    player         team  points3
##    <chr>          <chr>   <int>
##  1 Stephen Curry  GSW       324
##  2 Klay Thompson  GSW       268
##  3 James Harden   HOU       262
##  4 Eric Gordon    HOU       246
##  5 Isaiah Thomas  BOS       245
##  6 Kemba Walker   CHO       240
##  7 Bradley Beal   WAS       223
##  8 Damian Lillard POR       214
##  9 Ryan Anderson  HOU       204
## 10 J.J. Redick    LAC       201

Create a data frame gsw_mpg of GSW players that contains variables for player name, experience and min_per_game sorted by min_per_game in descending order

gsw_mpg <- filter(dat, team == "GSW")
gsw_mpg <- mutate(gsw_mpg, min_per_game = minutes / games)
gsw_mpg <- select(gsw_mpg, player, experience, min_per_game)
gsw_mpg <- select(gsw_mpg, player, experience, min_per_game)
gsw_mpg <- arrange(gsw_mpg, desc(min_per_game))
gsw_mpg
## # A tibble: 16 x 3
##    player               experience min_per_game
##    <chr>                     <int>        <dbl>
##  1 Klay Thompson                 5        34.0 
##  2 Stephen Curry                 7        33.4 
##  3 Kevin Durant                  9        33.4 
##  4 Draymond Green                4        32.5 
##  5 Andre Iguodala               12        26.3 
##  6 Matt Barnes                  13        20.5 
##  7 Zaza Pachulia                13        18.1 
##  8 Shaun Livingston             11        17.7 
##  9 Patrick McCaw                 0        15.1 
## 10 Ian Clark                     3        14.8 
## 11 David West                   13        12.6 
## 12 JaVale McGee                  8         9.60
## 13 James Michael McAdoo          2         8.79
## 14 Damian Jones                  0         8.5 
## 15 Kevon Looney                  1         8.43
## 16 Anderson Varejao             12         6.57

Summarzing values with summarise()

summarise(dat, avg_salary = mean(salary))
## # A tibble: 1 x 1
##   avg_salary
##        <dbl>
## 1   5804697.
summarise(dat,
          min = min(salary),
          median = median(salary),
          avg = mean(salary),
          max = max(salary))
## # A tibble: 1 x 4
##     min  median      avg      max
##   <dbl>   <dbl>    <dbl>    <dbl>
## 1  5145 3000000 5804697. 30963450

Grouped operations

#average salary, grouped by team
summarise(
  group_by(dat, team),
  avg_salary = mean(salary)
)
## # A tibble: 30 x 2
##    team  avg_salary
##    <chr>      <dbl>
##  1 ATL     5494447.
##  2 BOS     6127673.
##  3 BRK     4011351.
##  4 CHI     5781368.
##  5 CHO     5531548.
##  6 CLE     7069699.
##  7 DAL     5157128.
##  8 DEN     4648719.
##  9 DET     6871632.
## 10 GSW     6265160.
## # ... with 20 more rows
#average salary, grouped by position
summarise(
  group_by(dat, position),
  avg_salary = mean(salary)
)
## # A tibble: 5 x 2
##   position avg_salary
##   <chr>         <dbl>
## 1 C          6529906.
## 2 PF         5801127.
## 3 PG         5601217.
## 4 SF         6042455.
## 5 SG         5114178.
#average weight and height, grouped by position, in descending order of average height
arrange(
  summarise(
    group_by(dat, position),
    avg_height = mean(height),
    avg_weight = mean(weight)),
  desc(avg_height)
)
## # A tibble: 5 x 3
##   position avg_height avg_weight
##   <chr>         <dbl>      <dbl>
## 1 C              83.2       251.
## 2 PF             81.4       235.
## 3 SF             79.5       220.
## 4 SG             77.0       204.
## 5 PG             74.3       189.

Use summarise() to get the largest height value

summarise(dat, max_height = max(height))
## # A tibble: 1 x 1
##   max_height
##        <dbl>
## 1         87

Use summarise() to get the standard deviation of points3

summarise(dat, sd_points3 = sd(points3))
## # A tibble: 1 x 1
##   sd_points3
##        <dbl>
## 1       55.1

Use summarise() and group_by() to display the median of three-points, by team

summarise(group_by(dat, team), median_points3 = median(points3))
## # A tibble: 30 x 2
##    team  median_points3
##    <chr>          <dbl>
##  1 ATL             32  
##  2 BOS             46  
##  3 BRK             36  
##  4 CHI             28.5
##  5 CHO             13  
##  6 CLE             26.5
##  7 DAL             18  
##  8 DEN             46  
##  9 DET             28  
## 10 GSW             10.5
## # ... with 20 more rows

Display the average triple points by team, in ascending order, of the bottom-5 teams

head(
  arrange(
    summarise(
      group_by(dat, team),
      avg_points3 = mean(points3)),
    avg_points3),
  5)
## # A tibble: 5 x 2
##   team  avg_points3
##   <chr>       <dbl>
## 1 NOP          32.4
## 2 PHO          33.5
## 3 ORL          34.3
## 4 SAC          35.1
## 5 CHI          35.3

Obtain the mean and standard deviation of age for Power Forwards, with 5 and 10 years(including) years of experience

pf_5_10 <- filter(dat, position == "PF" & (experience == 5 | experience == 10))
summarise(pf_5_10, mean_of_pf510 = mean(age), standard_deviation_pf510 = sd(age))
## # A tibble: 1 x 2
##   mean_of_pf510 standard_deviation_pf510
##           <dbl>                    <dbl>
## 1            28                     2.83

First contact with ggplot()

# scatterplot (option 1)
ggplot(data = dat) + geom_point(aes(x = points, y = salary))

# scatterplot (option 2)
ggplot(data = dat, aes(x = points, y = salary)) + geom_point()

# colored scatterplot
ggplot(data = dat, aes(x = points, y = salary)) + geom_point(aes(color = position))

#sized and colored scatterplot
ggplot(data = dat, aes(x = points, y = salary)) + geom_point(aes(color = position, size = points3))

#sized and colored scatterplot but transparent
ggplot(data = dat, aes(x = points, y = salary)) +
  geom_point(aes(color = position, size = points3), alpha = 0.7)

Use the data frame gsw to make a scatterplot of height and weight

ggplot(data = gsw, aes(x = height, y = weight)) + geom_point(aes(color = player), alpha = 0.4)

Find out how to make another scatterplot of height and weight, using geom_text() to display the names of the players

ggplot(data = gsw, aes(x = height, y = weight)) + geom_point(aes(color = player), alpha = 0.4) + geom_text(aes(label = player), nudge_x = 1, nudge_y = 1)

Get a scatterplot of height and weight, for ALL the warriors, displaying their names with geom_label()

gsw_all <- filter(dat, team == "GSW")
ggplot(data = gsw_all, aes(x = height, y = weight)) + geom_point(aes(color = player), alpha = 0.3) + geom_label(aes(label = player), nudge_x = 1, nudge_y = 1, alpha = 0.1)

Get a density plot of salary for all NBA players

ggplot(dat, aes(salary)) + geom_density(kernel = "gaussian", fill = "pink")

Get a histogram of points2 with binwidth of 50 for all NBA players

ggplot(dat, aes(points2)) + geom_histogram(binwidth = 50, fill = "lavender")

Get a barchart of the position frequencies for all NBA players

ggplot(dat, aes(position)) + geom_bar(fill = "light blue")

Make a scatterplot of experience and salary of all Centers and use geom_smooth() to add a regression line

centers <- filter(dat, position == "C")
ggplot(data = centers, aes(x = experience, y = salary)) + geom_point() + geom_smooth(method = lm)

Repeat the same scatterplot of experience and salary of all Centers, but now use geom_smooth() to add a loess line (i.e. smooth line)

ggplot(data = centers, aes(x = experience, y = salary)) + geom_point() + geom_smooth(method = loess)

Faceting

#scatterplot by position
ggplot(data = dat, aes(x = points, y = salary)) + geom_point() + facet_wrap(~ position)

# scatterplot by position
ggplot(data = dat, aes(x = points, y = salary)) +
  geom_point(aes(color = position), alpha = 0.7) +
  facet_grid(~ position) +
  geom_smooth(method = loess)

# scatterplot by position
ggplot(data = dat, aes(x = points, y = salary)) +
  geom_point(aes(color = position), alpha = 0.7) +
  facet_grid(position ~ .) +
  geom_smooth(method = loess)

Make scatterplots of experience and salary faceting by position

ggplot(data = dat, aes(x = experience, y = salary)) +
  geom_point(aes(color = position), alpha = 0.7) +
  facet_grid(position ~ .) +
  geom_smooth(method = loess)

Make scatterplots of experience and salary faceting by team

ggplot(data = dat, aes(x = experience, y = salary)) + geom_point() + facet_wrap(team ~ .)

Make density plots of age faceting by team

ggplot(dat, aes(salary)) + geom_density(kernel = "gaussian", fill = "snow") + facet_wrap(team ~ .)

Make scatterplots of height and weight faceting by position

ggplot(data = dat, aes(x = height, y = weight)) + geom_point() + geom_point(aes(color = position), alpha = 0.7) + facet_wrap(position ~ .)

ggplot(data = dat, aes(x = height, y = weight)) + geom_point() + geom_point(aes(color = position), alpha = 0.7) + facet_grid(position ~ .)

Make scatterplots of height and weight, with a 2-dimensional density, geom_density2d(), faceting by position

ggplot(data = dat, aes(x = height, y = weight)) + geom_point() + geom_point(aes(color = position), alpha = 0.7) + facet_wrap(position ~ .) + geom_density2d()

ggplot(data = dat, aes(x = height, y = weight)) + geom_point() + geom_point(aes(color = position), alpha = 0.7) + facet_grid(position ~ .) + geom_density2d()

Make a scatterplot of experience and salary for the Warriors, but this time add a layer with theme_bw() to get a simpler background

warriors <- filter(dat, team == "GSW")
ggplot(data = warriors, aes(x = experience, y = salary)) + geom_point() + geom_point(aes(color = position), alpha = 0.7) + facet_grid(position ~ .) + theme_bw()

Repeat any of the previous plots but now adding a leyer with another theme e.g. theme_minimal(), theme_dark(), theme_classic()

ggplot(data = warriors, aes(x = experience, y = salary)) + geom_point() + geom_point(aes(color = position), alpha = 0.7) + facet_grid(position ~ .) + theme_minimal()

ggplot(data = warriors, aes(x = experience, y = salary)) + geom_point() + geom_point(aes(color = position), alpha = 0.7) + facet_grid(position ~ .) + theme_dark()

ggplot(data = warriors, aes(x = experience, y = salary)) + geom_point() + geom_point(aes(color = position), alpha = 0.7) + facet_grid(position ~ .) + theme_classic()

More shell commands

cd Desktop/stat133/demo-repo/labs/lab05/images
ls
ls -l
ls -lt
ls -r
mkdr ../copies
cp unnamed-chunk-32-1.png ../copies
cp *.png ../copies
cd ../copies
mv unnamed-chunk-32-1.png renamed-chunk-32-1.png
mv unnamed-chunk-32-2.png renamed-chunk-32-2.png
cd ../report
mv ../copies ../copy-files
rm ../copy-files/unnamed-chunk-32-3.png
rm ../copy-files/*.*
rmdir ../copy-files